/* Copyright (c) 2005 - 2012 Vertica, an HP company -*- Java -*- */ package com.vertica.squeal; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.InputStream; import java.io.InputStreamReader; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.StringReader; import java.io.StringWriter; import java.sql.Connection; import java.sql.DriverManager; import java.util.AbstractList; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.HashSet; import java.util.Map; import java.util.Properties; import java.util.jar.Attributes; import java.util.jar.JarFile; import java.util.jar.Manifest; import java.text.ParseException; import jline.ConsoleReader; import jline.ConsoleReaderInputStream; import jline.History; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.hadoop.fs.Path; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.log4j.PropertyConfigurator; import org.apache.pig.*; import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil; import org.apache.pig.classification.InterfaceAudience; import org.apache.pig.classification.InterfaceStability; import org.apache.pig.ExecType; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.io.FileLocalizer; import org.apache.pig.impl.logicalLayer.*; import org.apache.pig.impl.util.JarManager; import org.apache.pig.impl.util.ObjectSerializer; import org.apache.pig.impl.util.PropertiesUtil; import org.apache.pig.impl.util.UDFContext; import org.apache.pig.tools.pigstats.PigStatsUtil; import org.apache.pig.tools.cmdline.CmdLineParser; import org.apache.pig.tools.grunt.Grunt; import org.apache.pig.tools.grunt.GruntParser; import org.apache.pig.impl.util.LogUtils; import org.apache.pig.tools.timer.PerformanceTimerFactory; import org.apache.pig.tools.parameters.ParameterSubstitutionPreprocessor; /** * Main class for Pig engine. */ @InterfaceAudience.LimitedPrivate({"Oozie"}) @InterfaceStability.Stable public class Squeal { private final static Log log = LogFactory.getLog(Squeal.class); /** Class name for Vertica JDBC Driver */ public static final String VERTICA_DRIVER_CLASS = "com.vertica.jdbc.Driver"; public static final String VERTICA_DRIVER_CLASS_41 = "com.vertica.Driver"; private static final String LOG4J_CONF = "log4jconf"; private static final String BRIEF = "brief"; private static final String DEBUG = "debug"; private static final String JAR = "jar"; private static final String VERBOSE = "verbose"; private static final String JDBCURL = "jdbc.url"; private static final String USERNAME = "jdbc.username"; private static final String PASSWORD = "jdbc.password"; private enum ExecMode {STRING, FILE, SHELL, UNKNOWN} /** * The Main-Class for the Pig Jar that will provide a shell and setup a classpath appropriate * for executing Jar files. Warning, this method calls System.exit(). * * @param args * -jar can be used to add additional jar files (colon separated). - will start a * shell. -e will execute the rest of the command line as if it was input to the * shell. * @throws IOException */ public static void main(String args[]) { int rc = 1; Properties properties = new Properties(); PropertiesUtil.loadPropertiesFromFile(properties); boolean verbose = false; boolean gruntCalled = false; String logFileName = null; boolean userSpecifiedLog = false; boolean runPig = false; try { BufferedReader pin = null; boolean debug = false; boolean dryrun = false; ArrayList<String> params = new ArrayList<String>(); ArrayList<String> paramFiles = new ArrayList<String>(); HashSet<String> optimizerRules = new HashSet<String>(); CmdLineParser opts = new CmdLineParser(args); opts.registerOpt('4', "log4jconf", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('b', "brief", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('c', "cluster", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('d', "debug", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('e', "execute", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('f', "file", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('h', "help", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('i', "version", CmdLineParser.ValueExpected.OPTIONAL); opts.registerOpt('j', "jar", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('l', "logfile", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('m', "param_file", CmdLineParser.ValueExpected.OPTIONAL); opts.registerOpt('p', "param", CmdLineParser.ValueExpected.OPTIONAL); opts.registerOpt('P', "pig", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('r', "dryrun", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('t', "optimizer_off", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('v', "verbose", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('w', "warning", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('x', "exectype", CmdLineParser.ValueExpected.REQUIRED); opts.registerOpt('F', "stop_on_failure", CmdLineParser.ValueExpected.NOT_ACCEPTED); opts.registerOpt('M', "no_multiquery", CmdLineParser.ValueExpected.NOT_ACCEPTED); ExecMode mode = ExecMode.UNKNOWN; String file = null; ExecType execType = ExecType.MAPREDUCE ; String execTypeString = properties.getProperty("exectype"); if(execTypeString!=null && execTypeString.length()>0){ execType = PigServer.parseExecType(execTypeString); } String cluster = "local"; String clusterConfigured = properties.getProperty("cluster"); if(clusterConfigured != null && clusterConfigured.length() > 0){ cluster = clusterConfigured; } //by default warning aggregation is on properties.setProperty("aggregate.warning", ""+true); //by default multiquery optimization is on properties.setProperty("opt.multiquery", ""+true); //by default we keep going on error on the backend properties.setProperty("stop.on.failure", ""+false); // set up client side system properties in UDF context UDFContext.getUDFContext().setClientSystemProps(); char opt; while ((opt = opts.getNextOpt()) != CmdLineParser.EndOfOpts) { switch (opt) { case '4': String log4jconf = opts.getValStr(); if(log4jconf != null){ properties.setProperty(LOG4J_CONF, log4jconf); } break; case 'b': properties.setProperty(BRIEF, "true"); break; case 'c': // Needed away to specify the cluster to run the MR job on // Bug 831708 - fixed String clusterParameter = opts.getValStr(); if (clusterParameter != null && clusterParameter.length() > 0) { cluster = clusterParameter; } break; case 'd': String logLevel = opts.getValStr(); if (logLevel != null) { properties.setProperty(DEBUG, logLevel); } debug = true; break; case 'e': mode = ExecMode.STRING; break; case 'f': mode = ExecMode.FILE; file = opts.getValStr(); break; case 'F': properties.setProperty("stop.on.failure", ""+true); break; case 'h': usage(); rc = 0; return; case 'i': System.out.println(getVersionString()); rc = 0; return; case 'j': String jarsString = opts.getValStr(); if(jarsString != null){ properties.setProperty(JAR, jarsString); } break; case 'l': //call to method that validates the path to the log file //and sets up the file to store the client side log file String logFileParameter = opts.getValStr(); if (logFileParameter != null && logFileParameter.length() > 0) { logFileName = validateLogFile(logFileParameter, null); } else { logFileName = validateLogFile(logFileName, null); } userSpecifiedLog = true; properties.setProperty("pig.logfile", (logFileName == null? "": logFileName)); break; case 'm': paramFiles.add(opts.getValStr()); break; case 'M': // turns off multiquery optimization log.info("Cannot turn off multiquery for Squeal"); //properties.setProperty("opt.multiquery",""+false); break; case 'p': String val = opts.getValStr(); params.add(opts.getValStr()); break; case 'P': runPig = true; break; case 'r': // currently only used for parameter substitution // will be extended in the future dryrun = true; break; case 't': optimizerRules.add(opts.getValStr()); break; case 'v': properties.setProperty(VERBOSE, ""+true); verbose = true; break; case 'w': properties.setProperty("aggregate.warning", ""+false); break; case 'x': try { execType = PigServer.parseExecType(opts.getValStr()); } catch (IOException e) { throw new RuntimeException("ERROR: Unrecognized exectype.", e); } break; default: { Character cc = Character.valueOf(opt); throw new AssertionError("Unhandled option " + cc.toString()); } } } // create the context with the parameter PigContext pigContext = new PigContext(execType, properties); if(logFileName == null && !userSpecifiedLog) { logFileName = validateLogFile(properties.getProperty("pig.logfile"), null); } if(logFileName != null) { log.info("Logging error messages to: " + logFileName); } pigContext.getProperties().setProperty("pig.logfile", (logFileName == null? "": logFileName)); // configure logging configureLog4J(properties, pigContext); if(optimizerRules.size() > 0) { pigContext.getProperties().setProperty("pig.optimizer.rules", ObjectSerializer.serialize(optimizerRules)); } if (properties.get("udf.import.list")!=null) PigContext.initializeImportList((String)properties.get("udf.import.list")); LogicalPlanBuilder.classloader = pigContext.createCl(null); // construct the parameter substitution preprocessor String remainders[] = opts.getRemainingArgs(); if (file == null) { if (remainders == null) throw new RuntimeException("You must specific a script to translate."); // They have a pig script they want us to run. if (remainders.length > 1) { throw new RuntimeException("You can only run one pig script " + "at a time from the command line."); } mode = ExecMode.FILE; file = remainders[0]; } BufferedReader in = new BufferedReader(new FileReader(file)); // run parameter substitution preprocessor first String substFile = file + ".substituted"; pin = runParamPreprocessor(in, params, paramFiles, substFile); if (!debug) { new File(substFile).deleteOnExit(); } // Set job name based on name of the script pigContext.getProperties().setProperty(PigContext.JOB_NAME, "PigLatin:" +new File(file).getName() ); log.info("Constructing server"); PigServer svr = new PigServer(pigContext); log.info("Configuring vertica connection"); Properties sysprops = System.getProperties(); String hostname = sysprops.getProperty("mapred.vertica.hostnames", "localhost"); String username = sysprops.getProperty("mapred.vertica.username", "dbadmin"); String password = sysprops.getProperty("mapred.vertica.password", ""); String database = sysprops.getProperty("mapred.vertica.database", ""); String port = sysprops.getProperty("mapred.vertica.port", ""); //InputScript is = new InputScript(pigContext); //is.readScript(substFile); BufferedReader infile = new BufferedReader(new FileReader(new File(substFile))); LOTranslator trans = new LOTranslator(); Connection conn = null; if (!dryrun) { try { Class.forName(VERTICA_DRIVER_CLASS); } catch (ClassNotFoundException e) { try { Class.forName(VERTICA_DRIVER_CLASS_41); } catch (ClassNotFoundException e2) { throw new RuntimeException(e); } } conn = DriverManager.getConnection("jdbc:vertica://" + hostname + ":" + port + "/" + database, username, password); } SquealParser parser = new SquealParser(infile, trans, conn, verbose, dryrun, runPig); parser.setParams(svr); parser.setInteractive(false); while (!parser.isDone()) { parser.parse(); } parser.cleanup(); return; // Per Utkarsh and Chris invocation of jar file via pig depricated. } catch (ParseException e) { usage(); rc = 2; } catch (NumberFormatException e) { usage(); rc = 2; } catch (PigException pe) { if(pe.retriable()) { rc = 1; } else { rc = 2; } pe.printStackTrace(); } catch (Throwable e) { rc = 2; e.printStackTrace(); } finally { // clear temp files FileLocalizer.deleteTempFiles(); PerformanceTimerFactory.getPerfTimerFactory().dumpTimers(); System.exit(rc); } } private static int getReturnCodeForStats(int[] stats) { if (stats[1] == 0) { // no failed jobs return 0; } else { if (stats[0] == 0) { // no succeeded jobs return 2; } else { // some jobs have failed return 3; } } } //TODO jz: log4j.properties should be used instead private static void configureLog4J(Properties properties, PigContext pigContext) { // TODO Add a file appender for the logs // TODO Need to create a property in the properties file for it. // sgroschupf, 25Feb2008: this method will be obsolete with PIG-115. String log4jconf = properties.getProperty(LOG4J_CONF); String trueString = "true"; boolean brief = trueString.equalsIgnoreCase(properties.getProperty(BRIEF)); Level logLevel = Level.INFO; String logLevelString = properties.getProperty(DEBUG); if (logLevelString != null){ logLevel = Level.toLevel(logLevelString, Level.INFO); } Properties props = new Properties(); FileReader propertyReader = null; if (log4jconf != null) { try { propertyReader = new FileReader(log4jconf); props.load(propertyReader); } catch (IOException e) { System.err.println("Warn: Cannot open log4j properties file, use default"); } finally { if (propertyReader != null) try {propertyReader.close();} catch(Exception e) {} } } if (props.size() == 0) { props.setProperty("log4j.logger.org.apache.pig", logLevel.toString()); if((logLevelString = System.getProperty("pig.logfile.level")) == null){ props.setProperty("log4j.rootLogger", "INFO, PIGCONSOLE"); } else{ logLevel = Level.toLevel(logLevelString, Level.INFO); props.setProperty("log4j.logger.org.apache.pig", logLevel.toString()); props.setProperty("log4j.rootLogger", "INFO, PIGCONSOLE, F"); props.setProperty("log4j.appender.F","org.apache.log4j.RollingFileAppender"); props.setProperty("log4j.appender.F.File",properties.getProperty("pig.logfile")); props.setProperty("log4j.appender.F.layout","org.apache.log4j.PatternLayout"); props.setProperty("log4j.appender.F.layout.ConversionPattern", brief ? "%m%n" : "%d [%t] %-5p %c - %m%n"); } props.setProperty("log4j.appender.PIGCONSOLE","org.apache.log4j.ConsoleAppender"); props.setProperty("log4j.appender.PIGCONSOLE.target", "System.err"); props.setProperty("log4j.appender.PIGCONSOLE.layout","org.apache.log4j.PatternLayout"); props.setProperty("log4j.appender.PIGCONSOLE.layout.ConversionPattern", brief ? "%m%n" : "%d [%t] %-5p %c - %m%n"); } PropertyConfigurator.configure(props); logLevel = Logger.getLogger("org.apache.pig").getLevel(); Properties backendProps = pigContext.getLog4jProperties(); backendProps.setProperty("log4j.logger.org.apache.pig.level", logLevel.toString()); pigContext.setLog4jProperties(backendProps); pigContext.setDefaultLogLevel(logLevel); } // returns the stream of final pig script to be passed to Grunt private static BufferedReader runParamPreprocessor(BufferedReader origPigScript, ArrayList<String> params, ArrayList<String> paramFiles, String scriptFile) throws org.apache.pig.tools.parameters.ParseException, IOException{ ParameterSubstitutionPreprocessor psp = new ParameterSubstitutionPreprocessor(50); String[] type1 = new String[1]; String[] type2 = new String[1]; BufferedWriter fw = new BufferedWriter(new FileWriter(scriptFile)); psp.genSubstitutedFile (origPigScript, fw, params.size() > 0 ? params.toArray(type1) : null, paramFiles.size() > 0 ? paramFiles.toArray(type2) : null); return new BufferedReader(new FileReader (scriptFile)); } private static String getVersionString() { String findContainingJar = JarManager.findContainingJar(Main.class); try { JarFile jar = new JarFile(findContainingJar); final Manifest manifest = jar.getManifest(); final Map <String,Attributes> attrs = manifest.getEntries(); Attributes attr = attrs.get("org/apache/pig"); String version = attr.getValue("Implementation-Version"); String svnRevision = attr.getValue("Svn-Revision"); String buildTime = attr.getValue("Build-TimeStamp"); // we use a version string similar to svn //svn, version 1.4.4 (r25188) // compiled Sep 23 2007, 22:32:34 return "Apache Pig version " + version + " (r" + svnRevision + ") \ncompiled "+buildTime; } catch (Exception e) { throw new RuntimeException("unable to read pigs manifest file", e); } } /** * Print usage string. */ public static void usage() { System.out.println("\n"+getVersionString()+"\n"); System.out.println("USAGE: Squeal [options] file : Translate script to SQL."); System.out.println(" options include:"); System.out.println(" -4, -log4jconf log4j configuration file, overrides log conf"); System.out.println(" -b, -brief brief logging (no timestamps)"); System.out.println(" -c, -cluster clustername, kryptonite is default"); System.out.println(" -d, -debug debug level, INFO is default"); System.out.println(" -h, -help display this message"); System.out.println(" -i, -version display version information"); System.out.println(" -j, -jar jarfile load jarfile"); System.out.println(" -l, -logfile path to client side log file; current working directory is default"); System.out.println(" -m, -param_file path to the parameter file"); System.out.println(" -p, -param key value pair of the form param=val"); System.out.println(" -t, -optimizer_off optimizer rule name, turn optimizer off for this rule; use all to turn all rules off, optimizer is turned on by default"); System.out.println(" -v, -verbose print all error messages to screen"); System.out.println(" -w, -warning turn warning on; also turns warning aggregation off"); System.out.println(" -x, -exectype local|mapreduce, mapreduce is default"); System.out.println(" -F, -stop_on_failure aborts execution on the first failed job; off by default"); System.out.println(" -M, -no_multiquery turn multiquery optimization off; Multiquery is on by default"); } private static String validateLogFile(String logFileName, String scriptName) { String strippedDownScriptName = null; if(scriptName != null) { File scriptFile = new File(scriptName); if(!scriptFile.isDirectory()) { String scriptFileAbsPath; try { scriptFileAbsPath = scriptFile.getCanonicalPath(); } catch (IOException ioe) { log.warn("Could not compute canonical path to the script file " + ioe.getMessage()); return null; } strippedDownScriptName = getFileFromCanonicalPath(scriptFileAbsPath); } } String defaultLogFileName = (strippedDownScriptName == null ? "pig_" : strippedDownScriptName) + new Date().getTime() + ".log"; File logFile; if(logFileName != null) { logFile = new File(logFileName); //Check if the file name is a directory //append the default file name to the file if(logFile.isDirectory()) { if(logFile.canWrite()) { try { logFileName = logFile.getCanonicalPath() + File.separator + defaultLogFileName; } catch (IOException ioe) { log.warn("Could not compute canonical path to the log file " + ioe.getMessage()); return null; } return logFileName; } else { log.warn("Need write permission in the directory: " + logFileName + " to create log file."); return null; } } else { //we have a relative path or an absolute path to the log file //check if we can write to the directory where this file is/will be stored if (logFile.exists()) { if(logFile.canWrite()) { try { logFileName = new File(logFileName).getCanonicalPath(); } catch (IOException ioe) { log.warn("Could not compute canonical path to the log file " + ioe.getMessage()); return null; } return logFileName; } else { //do not have write permissions for the log file //bail out with an error message log.warn("Cannot write to file: " + logFileName + ". Need write permission."); return logFileName; } } else { logFile = logFile.getParentFile(); if(logFile != null) { //if the directory is writable we are good to go if(logFile.canWrite()) { try { logFileName = new File(logFileName).getCanonicalPath(); } catch (IOException ioe) { log.warn("Could not compute canonical path to the log file " + ioe.getMessage()); return null; } return logFileName; } else { log.warn("Need write permission in the directory: " + logFile + " to create log file."); return logFileName; } }//end if logFile != null else is the default in fall through }//end else part of logFile.exists() }//end else part of logFile.isDirectory() }//end if logFileName != null //file name is null or its in the current working directory //revert to the current working directory String currDir = System.getProperty("user.dir"); logFile = new File(currDir); logFileName = currDir + File.separator + (logFileName == null? defaultLogFileName : logFileName); if(logFile.canWrite()) { return logFileName; } log.warn("Cannot write to log file: " + logFileName); return null; } private static String getFileFromCanonicalPath(String canonicalPath) { return canonicalPath.substring(canonicalPath.lastIndexOf(File.separator)); } }